import pickle
%matplotlib inline
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import scipy
from scipy import stats
import h5py
import statistics
import math
#this sets the size of the plot to something useful
plt.rcParams["figure.figsize"] = (20,15)
# open the file of interest, and use pickle loading
infile = open ("higgs_100000_pt_1000_1200.pkl",'rb')
higgs = pickle.load(infile)
infile2 = open ("qcd_100000_pt_1000_1200.pkl",'rb')
qcd = pickle.load(infile2)
bins = 500
plt.figure()
plt.hist([qcd['mass'][:]], bins, stacked = True, density = True, label = 'QCD', color='blue')
plt.hist([higgs['mass'][:]], bins, stacked = True, density = True, label = 'Higgs', color='green')
plt.tick_params(labelsize = 22)
plt.xlabel('Mass', fontsize=30)
plt.ylabel('Count', fontsize=30)
plt.yscale("log")
plt.title('Invariant Mass Distribution', fontsize=40)
plt.legend(loc='best', fontsize=30)
plt.show()
higgs_mass = 126 #Approximation based on histogram
print("Mass of the Higgs Boson: ", higgs_mass)
mu = statistics.mean(qcd['mass'][:])
print("Mean:" , mu)
value = stats.poisson.cdf(higgs_mass, mu)
print("Expected Significance: ", value, " sigma")
nhiggs = (sum(higgs['mass'][:]) / sum(higgs['mass'][:]))*50
nqcd_sqrt = ((sum(qcd['mass'][:])/sum(qcd['mass'][:]))*2000)**(0.5)
print("NHiggs: ", nhiggs)
print("(√NQCD): ", nqcd_sqrt)
print("ratio: ", nhiggs/nqcd_sqrt)
Using a poisson distribution we get an expected significance of 3.1e-05 for a higgs mass of 126. Now if I compare the Nhiggs to the sqrt(NQCD) we can see that Nhiggs is larger. Shown with the ratio being 1.12. This ratio represents how much the higgs signal and the QCD background overlap. With the data given to us, we can say that the higher value of the ratio corresponds with less overlapping overall.
mu = statistics.mean(higgs['mass'][:])
bins = 1000
bottom = mu - 10
upper = mu + 10
background1 = list(filter(lambda a: a >= bottom and a <= upper, qcd['mass'][:]))
cut1 = list(filter(lambda a: a >= bottom and a <= upper, higgs['mass'][:]))
plt.figure()
plt.hist(background1[:], bins, alpha=0.5, color="blue", density = True, label='QCD')
plt.hist(cut1[:] , bins, alpha=0.5, density = True, color="green", label='Higgs')
plt.tick_params(labelsize = 22)
plt.xlabel('Mass', fontsize=30)
plt.ylabel('Count', fontsize=30)
plt.yscale("log")
plt.title('Mass Cut 1', fontsize=40)
plt.legend(loc='best', fontsize=30)
plt.show()
nhiggs = (sum(cut1[:]) / sum(higgs['mass'][:]))*50
#print(nhiggs)
nqcd_root = ((sum(background1[:])/sum(qcd['mass'][:]))*2000)**(0.5)
ratio = nhiggs / nqcd_root
print("Expected Significance: ", ratio, "sigma")
bottom = mu - 5
upper = mu + 5
background1 = list(filter(lambda a: a >= bottom and a <= upper, qcd['mass'][:]))
cut1 = list(filter(lambda a: a >= bottom and a <= upper, higgs['mass'][:]))
plt.figure()
plt.hist(background1[:], bins, alpha=0.5, color="blue", density = True, label='QCD')
plt.hist(cut1[:] , bins, alpha=0.5, density = True, color="green", label='Higgs')
plt.tick_params(labelsize = 22)
plt.xlabel('Mass', fontsize=30)
plt.ylabel('Count', fontsize=30)
plt.yscale("log")
plt.title('Mass Cut 2', fontsize=40)
plt.legend(loc='best', fontsize=30)
plt.show()
nhiggs = (sum(cut1[:]) / sum(higgs['mass'][:]))*50
#print(nhiggs)
nqcd_root = ((sum(background1[:])/sum(qcd['mass'][:]))*2000)**(0.5)
ratio = nhiggs / nqcd_root
print("Expected Significance: ", ratio, "sigma")
bottom = 124.5
upper = 128
background1 = list(filter(lambda a: a >= bottom and a <= upper, qcd['mass'][:]))
cut1 = list(filter(lambda a: a >= bottom and a <= upper, higgs['mass'][:]))
plt.figure()
plt.hist(background1[:], bins, alpha=0.5, color="blue", density = True, label='QCD')
plt.hist(cut1[:] , bins, alpha=0.5, density = True, color="green", label='Higgs')
plt.tick_params(labelsize = 22)
plt.xlabel('Mass', fontsize=30)
plt.ylabel('Count', fontsize=30)
plt.yscale("log")
plt.title('Mass Cut 3', fontsize=40)
plt.legend(loc='best', fontsize=30)
plt.show()
nhiggs = (sum(cut1[:]) / sum(higgs['mass'][:]))*50
#print(nhiggs)
nqcd_root = ((sum(background1[:])/sum(qcd['mass'][:]))*2000)**(0.5)
ratio = nhiggs / nqcd_root
print("Expected Significance: ", ratio, "sigma")
bottom = 124.9
upper = 125.1
background1 = list(filter(lambda a: a >= bottom and a <= upper, qcd['mass'][:]))
cut1 = list(filter(lambda a: a >= bottom and a <= upper, higgs['mass'][:]))
plt.figure()
plt.hist(background1[:], bins, alpha=0.5, color="blue", density = True, label='QCD')
plt.hist(cut1[:] , bins, alpha=0.5, density = True, color="green", label='Higgs')
plt.tick_params(labelsize = 22)
plt.xlabel('Mass', fontsize=30)
plt.ylabel('Count', fontsize=30)
plt.yscale("log")
plt.title('Mass Cut 4', fontsize=40)
plt.legend(loc='best', fontsize=30)
plt.show()
nhiggs = (sum(cut1[:]) / sum(higgs['mass'][:]))*50
#print(nhiggs)
nqcd_root = ((sum(background1[:])/sum(qcd['mass'][:]))*2000)**(0.5)
ratio = nhiggs / nqcd_root
print("Expected Significance: ", ratio, "sigma")
After trying to cut the data systematically by only taking data a certain amount of standard deviations from the mean of the signal data I just made decisions based on my eyesight. Shown above the best bounds was from 124.5 to 128 which yielded the highest expected significance of 5.21. One thing that was fairly interesting was that setting the bounds inside the spike of the mass data wasn't the most effective when it came to maximizing the sigificance ratio (it yielded around a 3.69 sigma). Which makes sense overall that one cannot trim too much data because then you are cutting more of the signal dataset than the background which was learned several labs ago.
There are several features to test. Eta, 2-point ECF, 3-point ECF, 3 to 3 point ECF, angularity, 1-subjettiness, 2-subjettiness, 3-subjettiness, 21-subjettiness, 32-subjettiness, transverse momentum, and phi. Let's look at all of them shall we?
plt.hist(higgs['eta'][:], bins, alpha=0.5, color="green", density = True, label='Higgs')
plt.hist(qcd['eta'][:] , bins, alpha=0.5, density = True, color="blue", label='QCD')
plt.tick_params(labelsize = 22)
plt.title('Eta', fontsize=40)
plt.legend(loc='best', fontsize=30)
plt.show()
nhiggs = (sum(higgs['eta'][:]) / sum(higgs['eta'][:]))*50
nqcd_sqrt = ((sum(qcd['eta'][:])/sum(qcd['eta'][:]))*2000)**(0.5)
print("NHiggs: ", nhiggs)
print("(√NQCD): ", nqcd_sqrt)
print("ratio: ", nhiggs/nqcd_sqrt)
plt.hist(higgs['pt'][:], bins, alpha=0.5, color="green", density = True, label='Higgs')
plt.hist(qcd['pt'][:] , bins, alpha=0.5, density = True, color="blue", label='QCD')
plt.tick_params(labelsize = 22)
plt.title('Transverse Momentum', fontsize=40)
plt.legend(loc='best', fontsize=30)
plt.show()
plt.hist(higgs['phi'][:], bins, alpha=0.5, color="green", density = True, label='Higgs')
plt.hist(qcd['phi'][:] , bins, alpha=0.5, density = True, color="blue", label='QCD')
plt.tick_params(labelsize = 22)
plt.title('phi', fontsize=40)
plt.legend(loc='best', fontsize=30)
plt.show()
nhiggs = (sum(higgs['phi'][:]) / sum(higgs['phi'][:]))*50
nqcd_sqrt = ((sum(qcd['phi'][:])/sum(qcd['phi'][:]))*2000)**(0.5)
print("NHiggs: ", nhiggs)
print("(√NQCD): ", nqcd_sqrt)
print("ratio: ", nhiggs/nqcd_sqrt)
plt.hist(higgs['ee2'][:], bins, alpha=0.5, color="green", density = True, label='Higgs')
plt.hist(qcd['ee2'][:] , bins, alpha=0.5, density = True, color="blue", label='QCD')
plt.tick_params(labelsize = 22)
plt.title('2-point ECF', fontsize=40)
plt.legend(loc='best', fontsize=30)
plt.show()
nhiggs = (sum(higgs['ee2'][:]) / sum(higgs['ee2'][:]))*50
nqcd_sqrt = ((sum(qcd['ee2'][:])/sum(qcd['ee2'][:]))*2000)**(0.5)
print("NHiggs: ", nhiggs)
print("(√NQCD): ", nqcd_sqrt)
print("ratio: ", nhiggs/nqcd_sqrt)
plt.hist(higgs['ee3'][:], bins, alpha=0.5, color="green", density = True, label='Higgs')
plt.hist(qcd['ee3'][:] , bins, alpha=0.5, density = True, color="blue", label='QCD')
plt.tick_params(labelsize = 22)
plt.title('3-point ECF', fontsize=40)
plt.legend(loc='best', fontsize=30)
plt.xlim(0, 0.02)
plt.show()
nhiggs = (sum(higgs['ee3'][:]) / sum(higgs['ee3'][:]))*50
nqcd_sqrt = ((sum(qcd['ee3'][:])/sum(qcd['ee3'][:]))*2000)**(0.5)
print("NHiggs: ", nhiggs)
print("(√NQCD): ", nqcd_sqrt)
print("ratio: ", nhiggs/nqcd_sqrt)
plt.hist(higgs['d2'][:], bins, alpha=0.5, color="green", density = True, label='Higgs')
plt.hist(qcd['d2'][:] , bins, alpha=0.5, density = True, color="blue", label='QCD')
plt.tick_params(labelsize = 22)
plt.title('3 to 2 point ECF', fontsize=40)
plt.legend(loc='best', fontsize=30)
plt.xlim(0, 60)
plt.show()
nhiggs = (sum(higgs['d2'][:]) / sum(higgs['d2'][:]))*50
nqcd_sqrt = ((sum(qcd['d2'][:])/sum(qcd['d2'][:]))*2000)**(0.5)
print("NHiggs: ", nhiggs)
print("(√NQCD): ", nqcd_sqrt)
print("ratio: ", nhiggs/nqcd_sqrt)
plt.hist(higgs['angularity'][:], bins, alpha=0.5, color="green", density = True, label='Higgs')
plt.hist(qcd['angularity'][:] , bins, alpha=0.5, density = True, color="blue", label='QCD')
plt.tick_params(labelsize = 22)
plt.title('Angularity', fontsize=40)
plt.legend(loc='best', fontsize=30)
plt.xlim(0, .05)
plt.show()
nhiggs = (sum(higgs['angularity'][:]) / sum(higgs['angularity'][:]))*50
nqcd_sqrt = ((sum(qcd['angularity'][:])/sum(qcd['angularity'][:]))*2000)**(0.5)
print("NHiggs: ", nhiggs)
print("(√NQCD): ", nqcd_sqrt)
print("ratio: ", nhiggs/nqcd_sqrt)
plt.hist(higgs['t1'][:], bins, alpha=0.5, color="green", density = True, label='Higgs')
plt.hist(qcd['t1'][:] , bins, alpha=0.5, density = True, color="blue", label='QCD')
plt.tick_params(labelsize = 22)
plt.title('1-subjettiness', fontsize=40)
plt.legend(loc='best', fontsize=30)
plt.xlim(0, 3.5)
plt.show()
nhiggs = (sum(higgs['t1'][:]) / sum(higgs['t1'][:]))*50
nqcd_sqrt = ((sum(qcd['t1'][:])/sum(qcd['t1'][:]))*2000)**(0.5)
print("NHiggs: ", nhiggs)
print("(√NQCD): ", nqcd_sqrt)
print("ratio: ", nhiggs/nqcd_sqrt)
plt.hist(higgs['t2'][:], bins, alpha=0.5, color="green", density = True, label='Higgs')
plt.hist(qcd['t2'][:] , bins, alpha=0.5, density = True, color="blue", label='QCD')
plt.tick_params(labelsize = 22)
plt.title('2-subjettiness', fontsize=40)
plt.legend(loc='best', fontsize=30)
plt.xlim(0, 2)
plt.show()
nhiggs = (sum(higgs['t2'][:]) / sum(higgs['t2'][:]))*50
nqcd_sqrt = ((sum(qcd['t2'][:])/sum(qcd['t2'][:]))*2000)**(0.5)
print("NHiggs: ", nhiggs)
print("(√NQCD): ", nqcd_sqrt)
print("ratio: ", nhiggs/nqcd_sqrt)
plt.hist(higgs['t3'][:], bins, alpha=0.5, color="green", density = True, label='Higgs')
plt.hist(qcd['t3'][:] , bins, alpha=0.5, density = True, color="blue", label='QCD')
plt.tick_params(labelsize = 22)
plt.title('3-subjettiness', fontsize=40)
plt.legend(loc='best', fontsize=30)
plt.xlim(0, 1)
plt.show()
nhiggs = (sum(higgs['t3'][:]) / sum(higgs['t3'][:]))*50
nqcd_sqrt = ((sum(qcd['t3'][:])/sum(qcd['t3'][:]))*2000)**(0.5)
print("NHiggs: ", nhiggs)
print("(√NQCD): ", nqcd_sqrt)
print("ratio: ", nhiggs/nqcd_sqrt)
plt.hist(higgs['t21'][:], bins, alpha=0.5, color="green", density = True, label='Higgs')
plt.hist(qcd['t21'][:] , bins, alpha=0.5, density = True, color="blue", label='QCD')
plt.tick_params(labelsize = 22)
plt.title('21-subjettiness', fontsize=40)
plt.legend(loc='best', fontsize=30)
plt.xlim(0, 1)
plt.show()
nhiggs = (sum(higgs['t21'][:]) / sum(higgs['t21'][:]))*50
nqcd_sqrt = ((sum(qcd['t21'][:])/sum(qcd['t21'][:]))*2000)**(0.5)
print("NHiggs: ", nhiggs)
print("(√NQCD): ", nqcd_sqrt)
print("ratio: ", nhiggs/nqcd_sqrt)
plt.hist(higgs['t32'][:], bins, alpha=0.5, color="green", density = True, label='Higgs')
plt.hist(qcd['t32'][:] , bins, alpha=0.5, density = True, color="blue", label='QCD')
plt.tick_params(labelsize = 22)
plt.title('32-subjettiness', fontsize=40)
plt.legend(loc='best', fontsize=30)
plt.xlim(0, 1)
plt.show()
nhiggs = (sum(higgs['t32'][:]) / sum(higgs['t32'][:]))*50
nqcd_sqrt = ((sum(qcd['t32'][:])/sum(qcd['t32'][:]))*2000)**(0.5)
print("NHiggs: ", nhiggs)
print("(√NQCD): ", nqcd_sqrt)
print("ratio: ", nhiggs/nqcd_sqrt)
plt.hist(higgs['KtDeltaR'][:], bins, alpha=0.5, color="green", density = True, label='Higgs')
plt.hist(qcd['KtDeltaR'][:] , bins, alpha=0.5, density = True, color="blue", label='QCD')
plt.tick_params(labelsize = 22)
plt.title('KtDeltaR', fontsize=40)
plt.legend(loc='best', fontsize=30)
plt.xlim(0, 1)
plt.show()
nhiggs = (sum(higgs['KtDeltaR'][:]) / sum(higgs['KtDeltaR'][:]))*50
nqcd_sqrt = ((sum(qcd['KtDeltaR'][:])/sum(qcd['KtDeltaR'][:]))*2000)**(0.5)
print("NHiggs: ", nhiggs)
print("(√NQCD): ", nqcd_sqrt)
print("ratio: ", nhiggs/nqcd_sqrt)
The expected significance all yielded the same value of 1.18 sigma which is less than the optimal cut for the mass.
We must identify additional features to further improve the expected significance. From the previous labs I will only be looking closely at features that already have have a decent amount of discrimination and making cuts on those. These would include: eta, 2-point ECF, 3-point ECF, 3 to 2 point ECF, theta, angularity, 3-subjettiness, 21-subjettiness, KtDeltaR, and 2-subjettiness). Using the good old eye ball test on the graphs from set A I concluded the initial cuts I will make to those graphs.
bottom = -1
upper = 1
background = list(filter(lambda a: a >= bottom and a <= upper, qcd['eta'][:]))
signal = list(filter(lambda a: a >= bottom and a <= upper, higgs['eta'][:]))
plt.hist(background[:], 200, alpha=0.5, color="blue", density = True, label='background')
plt.hist(signal[:] , 200, alpha=0.5, density = True, color="green", label='signal')
plt.tick_params(labelsize = 22)
plt.title('Eta', fontsize=40)
plt.legend(loc='best', fontsize=30)
plt.show()
nhiggs = (sum(signal[:]) / sum(higgs['eta'][:]))*50
nqcd_root = ((sum(background[:])/sum(qcd['eta'][:]))*2000)**(0.5)
ratio = nhiggs / nqcd_root
print("Expected Significance: ", ratio, "sigma")
bottom = 0.04
upper = 0.07
background = list(filter(lambda a: a >= bottom and a <= upper, qcd['ee2'][:]))
signal = list(filter(lambda a: a >= bottom and a <= upper, higgs['ee2'][:]))
plt.hist(background[:], 200, alpha=0.5, color="blue", density = True, label='background')
plt.hist(signal[:] , 200, alpha=0.5, density = True, color="green", label='signal')
plt.tick_params(labelsize = 22)
plt.title('2-point ECF', fontsize=40)
plt.legend(loc='best', fontsize=30)
plt.show()
nhiggs = (sum(signal[:]) / sum(higgs['ee2'][:]))*50
nqcd_root = ((sum(background[:])/sum(qcd['ee2'][:]))*2000)**(0.5)
ratio = nhiggs / nqcd_root
print("Expected Significance: ", ratio, "sigma")
bottom = 0.0001
upper = 0.0004
background = list(filter(lambda a: a >= bottom and a <= upper, qcd['ee3'][:]))
signal = list(filter(lambda a: a >= bottom and a <= upper, higgs['ee3'][:]))
plt.hist(background[:], 200, alpha=0.5, color="blue", density = True, label='background')
plt.hist(signal[:] , 200, alpha=0.5, density = True, color="green", label='signal')
plt.tick_params(labelsize = 22)
plt.title('3-point ECF', fontsize=40)
plt.legend(loc='best', fontsize=30)
plt.show()
nhiggs = (sum(signal[:]) / sum(higgs['ee3'][:]))*50
nqcd_root = ((sum(background[:])/sum(qcd['ee3'][:]))*2000)**(0.5)
ratio = nhiggs / nqcd_root
print("Expected Significance: ", ratio, "sigma")
bottom = 0
upper = 2
background = list(filter(lambda a: a >= bottom and a <= upper, qcd['d2'][:]))
signal = list(filter(lambda a: a >= bottom and a <= upper, higgs['d2'][:]))
plt.hist(background[:], 200, alpha=0.5, color="blue", density = True, label='background')
plt.hist(signal[:] , 200, alpha=0.5, density = True, color="green", label='signal')
plt.tick_params(labelsize = 22)
plt.title('3 to 2 point ECF ratio', fontsize=40)
plt.legend(loc='best', fontsize=30)
plt.show()
nhiggs = (sum(signal[:]) / sum(higgs['d2'][:]))*50
nqcd_root = ((sum(background[:])/sum(qcd['d2'][:]))*2000)**(0.5)
ratio = nhiggs / nqcd_root
print("Expected Significance: ", ratio, "sigma")
bottom = 0
upper = 0.001
background = list(filter(lambda a: a >= bottom and a <= upper, qcd['angularity'][:]))
signal = list(filter(lambda a: a >= bottom and a <= upper, higgs['angularity'][:]))
plt.hist(background[:], 200, alpha=0.5, color="blue", density = True, label='background')
plt.hist(signal[:] , 200, alpha=0.5, density = True, color="green", label='signal')
plt.tick_params(labelsize = 22)
plt.title('Angularity', fontsize=40)
plt.legend(loc='best', fontsize=30)
plt.show()
nhiggs = (sum(signal[:]) / sum(higgs['angularity'][:]))*50
nqcd_root = ((sum(background[:])/sum(qcd['angularity'][:]))*2000)**(0.5)
ratio = nhiggs / nqcd_root
print("Expected Significance: ", ratio, "sigma")
bottom = 0.1
upper = 0.3
background = list(filter(lambda a: a >= bottom and a <= upper, qcd['t2'][:]))
signal = list(filter(lambda a: a >= bottom and a <= upper, higgs['t2'][:]))
plt.hist(background[:], 200, alpha=0.5, color="blue", density = True, label='background')
plt.hist(signal[:] , 200, alpha=0.5, density = True, color="green", label='signal')
plt.tick_params(labelsize = 22)
plt.title('2-subjettiness', fontsize=40)
plt.legend(loc='best', fontsize=30)
plt.show()
nhiggs = (sum(signal[:]) / sum(higgs['t2'][:]))*50
nqcd_root = ((sum(background[:])/sum(qcd['t2'][:]))*2000)**(0.5)
ratio = nhiggs / nqcd_root
print("Expected Significance: ", ratio, "sigma")
bottom = 0
upper = 0.2
background = list(filter(lambda a: a >= bottom and a <= upper, qcd['t3'][:]))
signal = list(filter(lambda a: a >= bottom and a <= upper, higgs['t3'][:]))
plt.hist(background[:], 200, alpha=0.5, color="blue", density = True, label='background')
plt.hist(signal[:] , 200, alpha=0.5, density = True, color="green", label='signal')
plt.tick_params(labelsize = 22)
plt.title('3-subjettiness', fontsize=40)
plt.legend(loc='best', fontsize=30)
plt.show()
nhiggs = (sum(signal[:]) / sum(higgs['t3'][:]))*50
nqcd_root = ((sum(background[:])/sum(qcd['t3'][:]))*2000)**(0.5)
ratio = nhiggs / nqcd_root
print("Expected Significance: ", ratio, "sigma")
bottom = 0
upper = 0.42
background = list(filter(lambda a: a >= bottom and a <= upper, qcd['t21'][:]))
signal = list(filter(lambda a: a >= bottom and a <= upper, higgs['t21'][:]))
plt.hist(background[:], 200, alpha=0.5, color="blue", density = True, label='background')
plt.hist(signal[:] , 200, alpha=0.5, density = True, color="green", label='signal')
plt.tick_params(labelsize = 22)
plt.title('21-subjettiness', fontsize=40)
plt.legend(loc='best', fontsize=30)
plt.show()
nhiggs = (sum(signal[:]) / sum(higgs['t21'][:]))*50
nqcd_root = ((sum(background[:])/sum(qcd['t21'][:]))*2000)**(0.5)
ratio = nhiggs / nqcd_root
print("Expected Significance: ", ratio, "sigma")
bottom = 0.18
upper = 0.275
background = list(filter(lambda a: a >= bottom and a <= upper, qcd['KtDeltaR'][:]))
signal = list(filter(lambda a: a >= bottom and a <= upper, higgs['KtDeltaR'][:]))
plt.hist(background[:], 200, alpha=0.5, color="blue", density = True, label='background')
plt.hist(signal[:] , 200, alpha=0.5, density = True, color="green", label='signal')
plt.tick_params(labelsize = 22)
plt.title('KtDeltaR', fontsize=40)
plt.legend(loc='best', fontsize=30)
plt.show()
nhiggs = (sum(signal[:]) / sum(higgs['t21'][:]))*50
nqcd_root = ((sum(background[:])/sum(qcd['t21'][:]))*2000)**(0.5)
ratio = nhiggs / nqcd_root
print("Expected Significance: ", ratio, "sigma")
Of course while I was creating these cuts if the initial guess wasn't good enough they were modified in order to find a better significance ratio. The eta cut was the best by far with 11.85 sigma, which is surprisingly large. The 3-poiunt ECF was pretty high with around 4.95 sigma; this result made more sense in comparison to the output given from the eta cut.
There are two features that can assist me to get the best significance which is the 3 point ECF and the mass; used from problem 3. Initially the significance ratio before the event selection was 1.118 sigma which was consistent with all the features before making the cuts. This could be explained that we were given 100k jets for sigmal and 100k jets for the background. Which would yield a ratio of 1. When making data cuts to features with a larger degree of discrimination the significance ratio increases. In the mass example if we were to cut it between 124.5 and 128 the significance went up to 5.21 sigma. For the 3-point ECF, making a cut in the range of 0.001 and 0.004, the significance went up to 4.96 sigma.
Something interesting between my partner's data and mine is that they producted different outcomes after the cuts and they had different variables that yielded the best significance. Unlike in mine with the 3-point ECF and mass, they had the eta cut and the mass cut at 3.7 sigma and 3.1 sigma respectively.
nhiggs = (sum(higgs['mass'][:]) / sum(higgs['mass'][:]))*50
nqcd_sqrt = ((sum(qcd['mass'][:])/sum(qcd['mass'][:]))*2000)**(0.5)
print("NHiggs: ", nhiggs)
print("(√NQCD): ", nqcd_sqrt)
print("Ratio before Event Selection: ", nhiggs/nqcd_sqrt)
bottom = 124.5
upper = 128
background = list(filter(lambda a: a >= bottom and a <= upper, qcd['mass'][:]))
signal = list(filter(lambda a: a >= bottom and a <= upper, higgs['mass'][:]))
nhiggs = (sum(signal) / sum(higgs['mass'][:]))*50
nqcd_sqrt = ((sum(background)/sum(qcd['mass'][:]))*2000)**(0.5)
print("Ratio after Event Selection(mass): ", nhiggs/nqcd_sqrt)
bottom = 0.0001
upper = 0.0004
background = list(filter(lambda a: a >= bottom and a <= upper, qcd['ee3'][:]))
signal = list(filter(lambda a: a >= bottom and a <= upper, higgs['ee3'][:]))
nhiggs = (sum(signal[:]) / sum(higgs['ee3'][:]))*50
nqcd_sqrt = ((sum(background[:])/sum(qcd['ee3'][:]))*2000)**(0.5)
print("Ratio after Event Selection(ee3): ", nhiggs/nqcd_sqrt)
import pandas as pd
highLumi = pd.read_hdf('data_highLumi_pt_1000_1200.h5')
lowLumi = pd.read_hdf('data_lowLumi_pt_1000_1200.h5')
Below red represents the high luminosity data set, green represents the signal data, and blue represents the background data set. To better show the trend and relationship between observed, signal and background the x-axis has been truncated between 20 and 450 (eliminates vast amounts of white space). The y-axis has also been changed to a log plot to better highlight the differences and trends between the 3 datasets. As seen in the plot, the mass feature of the observed data closely follows the expected background.
bins = 500
plt.hist(highLumi['mass'][:], bins, alpha=0.4, color='red', density = True, label="Observed Data")
plt.hist(higgs['mass'][:], bins, alpha=0.4, color="green", density = True, label="Expected Signal")
plt.hist(qcd['mass'][:] , bins, alpha=0.4, density = True, color="blue", label="Expected Background")
plt.xlim(20,450)
plt.yscale('log')
plt.tick_params(labelsize = 22)
plt.title('Mass', fontsize=40)
plt.legend(loc='best', fontsize=30)
plt.show()
nhiggs = (sum(higgs['mass'][:]) / sum(higgs['mass'][:]))*50
nqcd_sqrt = ((sum(highLumi['mass'][:])/sum(highLumi['mass'][:]))*2000)**(0.5)
print("NHiggs: ", nhiggs)
print("(√NObserved): ", nqcd_sqrt)
ratio = nhiggs/nqcd_sqrt
print("Observed Significance: ", ratio, "sigma")
It seems that the observed data is much more fragmented(spiky) than the expected background data. This means that the transverse momentum of pp collisions is not as predictable as the expected background makes it out to be. Also certain values of transverse momentum are more favored than others.
bins = 500
plt.hist(highLumi['pt'][:], bins, alpha=0.4, color='red', density = True, label="Observed Data")
plt.hist(higgs['pt'][:], bins, alpha=0.4, color="green", density = True, label="Expected Signal")
plt.hist(qcd['pt'][:] , bins, alpha=0.4, density = True, color="blue", label="Expected Background")
plt.tick_params(labelsize = 22)
plt.title('pT(Transverse Momentum)', fontsize=40)
plt.legend(loc='best', fontsize=30)
plt.show()
Observed data is spiky but generally follows the background data(looks relatively normal). It is interesting to note that the highest spikes are around the mean(indicating that some values are indeed favored for the eta angle).
bins = 500
plt.hist(highLumi['eta'][:], bins, alpha=0.4, color='red', density = True, label="Observed Data")
plt.hist(higgs['eta'][:], bins, alpha=0.4, color="green", density = True, label="Expected Signal")
plt.hist(qcd['eta'][:] , bins, alpha=0.4, density = True, color="blue", label="Expected Background")
plt.legend(loc="upper right", fontsize=30)
plt.tick_params(labelsize = 22)
plt.title('Eta', fontsize=40)
plt.show()
Both the expected signal and expected background creates an even random spread for the phi angle. But the the observed data is spiky and not so consistent.
bins = 500
plt.hist(highLumi['phi'][:], bins, alpha=0.4, color='red', density = True, label="Observed Data")
plt.hist(higgs['phi'][:], bins, alpha=0.4, color="green", density = True, label="Expected Signal")
plt.hist(qcd['phi'][:] , bins, alpha=0.4, density = True, color="blue", label="Expected Background")
plt.legend(loc="upper right", fontsize=30)
plt.tick_params(labelsize = 22)
plt.title('Phi', fontsize=40)
plt.show()
For the 2-point ECF I chose a logscale and limited the x-axis between -0.01 and 0.23 to better show the trend of the expected background and observed data. As per usual the observed data is spiky, but generally follows the expected background trend.
bins = 500
plt.hist(highLumi['ee2'][:], bins, alpha=0.4, color='red', density = True, label="Observed Data")
plt.hist(higgs['ee2'][:], bins, alpha=0.4, color="green", density = True, label="Expected Signal")
plt.hist(qcd['ee2'][:] , bins, alpha=0.4, density = True, color="blue", label="Expected Background")
plt.legend(loc="upper right", fontsize=30)
plt.tick_params(labelsize = 22)
plt.yscale("log")
plt.xlim(-0.01, 0.23)
plt.title('2-point ECF', fontsize=40)
plt.show()
For the 3-point ECF I plotted a log plot, and limited the x-axis from -0.001 to 0.08 to better show trend. It is interesting to note that the expected background predicts spikiness and outliers, but the observed data starts getting spiky much sooner than the expected background(~0 - 0.01). The outlier values are also relatively favored, since their spikes are higher than the expected background spikes.
bins = 500
plt.hist(highLumi['ee3'][:], bins, alpha=0.4, color='red', density = True, label="Observed Data")
plt.hist(higgs['ee3'][:], bins, alpha=0.4, color="green", density = True, label="Expected Signal")
plt.hist(qcd['ee3'][:] , bins, alpha=0.4, density = True, color="blue", label="Expected Background")
plt.legend(loc="upper right", fontsize=30)
plt.tick_params(labelsize = 22)
plt.yscale("log")
plt.xlim(-0.001, 0.08)
plt.title('3-point ECF', fontsize=40)
plt.show()
Plotted on a log scale, and limited between -1 to 100 to better show trend. Observed generally follows expected background up until 38ish. It is also interesting to note that the observed data starts getting spiky approximately when the expected signal also gets spiky. Once again we see the same trend that specific outliers are favored (relatively high probability), as they rise above the expected background.
bins = 500
plt.hist(highLumi['d2'][:], bins, alpha=0.4, color='red', density = True, label="Observed Data")
plt.hist(higgs['d2'][:], bins, alpha=0.4, color="green", density = True, label="Expected Signal")
plt.hist(qcd['d2'][:] , bins, alpha=0.4, density = True, color="blue", label="Expected Background")
plt.legend(loc="upper right", fontsize=30)
plt.yscale("log")
plt.xlim(-1,100)
plt.tick_params(labelsize = 22)
plt.title('3 to 2 point ECF ratio', fontsize=40)
plt.show()
Angularity is plotted on the log scale and between -0.001 and 0.02 to better analyze trend. Same trend is observed where the observed data generally follows the expected background, but as we get further away from the mean, the observed data becomes increasingly more spiky. As it gets spiky, certain values consistently rise above the expected background.
bins = 500
plt.hist(highLumi['angularity'][:], bins, alpha=0.4, color='red', density = True, label="Observed Data")
plt.hist(higgs['angularity'][:], bins, alpha=0.4, color="green", density = True, label="Expected Signal")
plt.hist(qcd['angularity'][:] , bins, alpha=0.4, density = True, color="blue", label="Expected Background")
plt.legend(loc="upper right", fontsize=30)
plt.tick_params(labelsize = 22)
plt.yscale("log")
plt.xlim(-0.001,0.02)
plt.title('Angularity', fontsize=40)
plt.show()
1-subjettiness is limited between 0.5 and 2. The observed data does a great job of following the expected background, there is minimal overlap between the observed data and the expected signal. Looking down this is true for 2,3,21,32 subjettiness and KtDeltaR. The observed data is always more spiky (inconsistent), but generally tries to follow the expected background, and doesn't overlap with the expected signal.
bins = 500
plt.hist(highLumi['t1'][:], bins, alpha=0.4, color='red', density = True, label="Observed Data")
plt.hist(higgs['t1'][:], bins, alpha=0.4, color="green", density = True, label="Expected Signal")
plt.hist(qcd['t1'][:] , bins, alpha=0.4, density = True, color="blue", label="Expected Background")
plt.legend(loc="upper right", fontsize=30)
plt.xlim(0.5,2)
plt.tick_params(labelsize = 22)
plt.title('1-subjettiness', fontsize=40)
plt.show()
bins = 500
plt.hist(highLumi['t2'][:], bins, alpha=0.4, color='red', density = True, label="Observed Data")
plt.hist(higgs['t2'][:], bins, alpha=0.4, color="green", density = True, label="Expected Signal")
plt.hist(qcd['t2'][:] , bins, alpha=0.4, density = True, color="blue", label="Expected Background")
plt.legend(loc="upper right", fontsize=30)
plt.xlim(0,1.1)
plt.tick_params(labelsize = 22)
plt.title('2-subjettiness', fontsize=40)
plt.show()
bins = 500
plt.hist(highLumi['t3'][:], bins, alpha=0.4, color='red', density = True, label="Observed Data")
plt.hist(higgs['t3'][:], bins, alpha=0.4, color="green", density = True, label="Expected Signal")
plt.hist(qcd['t3'][:] , bins, alpha=0.4, density = True, color="blue", label="Expected Background")
plt.legend(loc="upper right", fontsize=30)
plt.tick_params(labelsize = 22)
plt.title('3-subjettiness', fontsize=40)
plt.show()
bins = 500
plt.hist(highLumi['t21'][:], bins, alpha=0.4, color='red', density = True, label="Observed Data")
plt.hist(higgs['t21'][:], bins, alpha=0.4, color="green", density = True, label="Expected Signal")
plt.hist(qcd['t21'][:] , bins, alpha=0.4, density = True, color="blue", label="Expected Background")
plt.legend(loc="upper right", fontsize=30)
plt.tick_params(labelsize = 22)
plt.title('21-subjettiness', fontsize=40)
plt.show()
bins = 500
plt.hist(highLumi['t32'][:], bins, alpha=0.4, color='red', density = True, label="Observed Data")
plt.hist(higgs['t32'][:], bins, alpha=0.4, color="green", density = True, label="Expected Signal")
plt.hist(qcd['t32'][:] , bins, alpha=0.4, density = True, color="blue", label="Expected Background")
plt.legend(loc="upper right", fontsize=30)
plt.tick_params(labelsize = 22)
plt.title('32-subjettiness', fontsize=40)
plt.show()
bins = 500
plt.hist(highLumi['KtDeltaR'][:], bins, alpha=0.4, color='red', density = True, label="Observed Data")
plt.hist(higgs['KtDeltaR'][:], bins, alpha=0.4, color="green", density = True, label="Expected Signal")
plt.hist(qcd['KtDeltaR'][:] , bins, alpha=0.4, density = True, color="blue", label="Expected Background")
plt.legend(loc="upper right", fontsize=30)
plt.tick_params(labelsize = 22)
plt.title('KtDeltaR', fontsize=40)
plt.show()
General analysis is that across all features, the high luminosity observed data follows the expected background, but it is also much more spiky and inconsistent compared to the nice expected trends the qcd background data set predicts. The expected significance of the expected signal and background is 1.118033988749895 sigma. While the observed significance before mass cuts is 1.118033988749895 sigma. These are completely equivalent and that is to be expected because the calculation for significance uses a ratio of data points eliminated vs. data points maintained. Since no mass cuts have been made yet, both the observed and background data start with 100k data points, resulting in equal significance values. In lab 7 I went through the expected signal and background plots and chose 10 features that showed to be highly discriminatory, and applied event selection to said features. This event selection allowed for the expected significance of the tested features to go up. I will use these same 10 features, with tested optimal cuts, and replace the background with the high luminosity observed data to evaluate how these cuts affect the observed significance of our dataset compared to the default expectation value of: 1.118 sigma.
Making a mass cut from 124.9 to 125.1 (optimal for qcd), shows that the expected background predicts the spiky nature of the observed data, but not to the same degree. Certain mass values are extremely favored, and this generally points to the discretized nature of mass values.
bottom = 124.9
upper = 125.1
observed = list(filter(lambda a: a >= bottom and a <= upper, highLumi['mass'][:]))
background = list(filter(lambda a: a >= bottom and a <= upper, qcd['mass'][:]))
signal = list(filter(lambda a: a >= bottom and a <= upper, higgs['mass'][:]))
bins = 500
plt.hist(observed[:], bins, alpha=0.4, color='green', density = True, label="Observed Data")
plt.hist(signal[:], bins, alpha=0.4, color="red", density = True, label="Expected Signal")
plt.hist(background[:], bins, alpha=0.4, density = True, color="blue", label="Expected Background")
plt.legend(loc="upper right", fontsize=30)
plt.yscale("log")
plt.tick_params(labelsize = 22)
plt.title('Mass', fontsize=40)
plt.show()
nhiggs = (sum(signal[:]) / sum(higgs['mass'][:]))*50
nqcd_root = ((sum(observed[:])/sum(highLumi['mass'][:]))*2000)**(0.5)
ratio = nhiggs / nqcd_root
print("Observed Significance: ", ratio, "sigma")
bottom = -1
upper = 1
observed = list(filter(lambda a: a >= bottom and a <= upper, highLumi['eta'][:]))
background = list(filter(lambda a: a >= bottom and a <= upper, qcd['eta'][:]))
signal = list(filter(lambda a: a >= bottom and a <= upper, higgs['eta'][:]))
bins = 500
plt.hist(observed[:], bins, alpha=0.4, color='green', density = True, label="Observed Data")
plt.hist(signal[:], bins, alpha=0.4, color="red", density = True, label="Expected Signal")
plt.hist(background[:], bins, alpha=0.4, density = True, color="blue", label="Expected Background")
plt.legend(loc="upper right", fontsize=30)
plt.tick_params(labelsize = 22)
plt.title('Eta', fontsize=40)
plt.show()
nhiggs = (sum(signal[:]) / sum(higgs['eta'][:]))*50
nqcd_root = ((sum(observed[:])/sum(highLumi['eta'][:]))*2000)**(0.5)
ratio = nhiggs / nqcd_root
print("Observed Significance: ", ratio, "sigma")
bottom = 0.04
upper = 0.07
observed = list(filter(lambda a: a >= bottom and a <= upper, highLumi['ee2'][:]))
background = list(filter(lambda a: a >= bottom and a <= upper, qcd['ee2'][:]))
signal = list(filter(lambda a: a >= bottom and a <= upper, higgs['ee2'][:]))
bins = 500
plt.hist(observed[:], bins, alpha=0.4, color='green', density = True, label="Observed Data")
plt.hist(signal[:], bins, alpha=0.4, color="red", density = True, label="Expected Signal")
plt.hist(background[:], bins, alpha=0.4, density = True, color="blue", label="Expected Background")
plt.legend(loc="upper right", fontsize=30)
plt.tick_params(labelsize = 22)
plt.title('2-point ECF', fontsize=40)
plt.show()
nhiggs = (sum(signal[:]) / sum(higgs['ee2'][:]))*50
nqcd_root = ((sum(observed[:])/sum(highLumi['ee2'][:]))*2000)**(0.5)
ratio = nhiggs / nqcd_root
print("Observed Significance: ", ratio, "sigma")
bottom = .0001
upper = .0004
observed = list(filter(lambda a: a >= bottom and a <= upper, highLumi['ee3'][:]))
background = list(filter(lambda a: a >= bottom and a <= upper, qcd['ee3'][:]))
signal = list(filter(lambda a: a >= bottom and a <= upper, higgs['ee3'][:]))
bins = 500
plt.hist(observed[:], bins, alpha=0.4, color='green', density = True, label="Observed Data")
plt.hist(signal[:], bins, alpha=0.4, color="red", density = True, label="Expected Signal")
plt.hist(background[:], bins, alpha=0.4, density = True, color="blue", label="Expected Background")
plt.legend(loc="upper right",fontsize=30)
plt.tick_params(labelsize = 22)
plt.yscale("log")
plt.title('3-point ECF', fontsize=40)
plt.show()
nhiggs = (sum(signal[:]) / sum(higgs['ee3'][:]))*50
nqcd_root = ((sum(observed[:])/sum(highLumi['ee3'][:]))*2000)**(0.5)
ratio = nhiggs / nqcd_root
print("Observed Significance: ", ratio, "sigma")
bottom = 0
upper = 2
observed = list(filter(lambda a: a >= bottom and a <= upper, highLumi['d2'][:]))
background = list(filter(lambda a: a >= bottom and a <= upper, qcd['d2'][:]))
signal = list(filter(lambda a: a >= bottom and a <= upper, higgs['d2'][:]))
bins = 500
plt.hist(observed[:], bins, alpha=0.4, color='green', density = True, label="Observed Data")
plt.hist(signal[:], bins, alpha=0.4, color="red", density = True, label="Expected Signal")
plt.hist(background[:], bins, alpha=0.4, density = True, color="blue", label="Expected Background")
plt.legend(loc="upper right", fontsize=30)
plt.tick_params(labelsize = 22)
plt.title('3 to 2 point ECF', fontsize=40)
plt.show()
nhiggs = (sum(signal[:]) / sum(higgs['d2'][:]))*50
nqcd_root = ((sum(observed[:])/sum(highLumi['d2'][:]))*2000)**(0.5)
ratio = nhiggs / nqcd_root
print("Observed Significance: ", ratio, "sigma")
bottom = 0
upper = 0.001
observed = list(filter(lambda a: a >= bottom and a <= upper, highLumi['angularity'][:]))
background = list(filter(lambda a: a >= bottom and a <= upper, qcd['angularity'][:]))
signal = list(filter(lambda a: a >= bottom and a <= upper, higgs['angularity'][:]))
bins = 500
plt.hist(observed[:], bins, alpha=0.4, color='green', density = True, label="Observed Data")
plt.hist(signal[:], bins, alpha=0.4, color="red", density = True, label="Expected Signal")
plt.hist(background[:], bins, alpha=0.4, density = True, color="blue", label="Expected Background")
plt.legend(loc="upper right", fontsize=30)
plt.tick_params(labelsize = 22)
plt.title('Angularity', fontsize=40)
plt.show()
nhiggs = (sum(signal[:]) / sum(higgs['angularity'][:]))*50
nqcd_root = ((sum(observed[:])/sum(highLumi['angularity'][:]))*2000)**(0.5)
ratio = nhiggs / nqcd_root
print("Observed Significance: ", ratio, "sigma")
bottom = 0.1
upper = 0.3
observed = list(filter(lambda a: a >= bottom and a <= upper, highLumi['t2'][:]))
background = list(filter(lambda a: a >= bottom and a <= upper, qcd['t2'][:]))
signal = list(filter(lambda a: a >= bottom and a <= upper, higgs['t2'][:]))
bins = 500
plt.hist(observed[:], bins, alpha=0.4, color='green', density = True, label="Observed Data")
plt.hist(signal[:], bins, alpha=0.4, color="red", density = True, label="Expected Signal")
plt.hist(background[:], bins, alpha=0.4, density = True, color="blue", label="Expected Background")
plt.legend(loc="upper right", fontsize=30)
plt.tick_params(labelsize = 22)
plt.title('2-subjettiness', fontsize=40)
plt.show()
nhiggs = (sum(signal[:]) / sum(higgs['t2'][:]))*50
nqcd_root = ((sum(observed[:])/sum(highLumi['t2'][:]))*2000)**(0.5)
ratio = nhiggs / nqcd_root
print("Observed Significance: ", ratio, "sigma")
bottom = 0
upper = 0.2
observed = list(filter(lambda a: a >= bottom and a <= upper, highLumi['t3'][:]))
background = list(filter(lambda a: a >= bottom and a <= upper, qcd['t3'][:]))
signal = list(filter(lambda a: a >= bottom and a <= upper, higgs['t3'][:]))
bins = 500
plt.hist(observed[:], bins, alpha=0.4, color='green', density = True, label="Observed Data")
plt.hist(signal[:], bins, alpha=0.4, color="red", density = True, label="Expected Signal")
plt.hist(background[:], bins, alpha=0.4, density = True, color="blue", label="Expected Background")
plt.legend(loc="upper right", fontsize=30)
plt.tick_params(labelsize = 22)
plt.title('3-subjettiness', fontsize=40)
plt.show()
nhiggs = (sum(signal[:]) / sum(higgs['t3'][:]))*50
nqcd_root = ((sum(observed[:])/sum(highLumi['t3'][:]))*2000)**(0.5)
ratio = nhiggs / nqcd_root
print("Observed Significance: ", ratio, "sigma")
bottom = 0
upper = 0.42
observed = list(filter(lambda a: a >= bottom and a <= upper, highLumi['t21'][:]))
background = list(filter(lambda a: a >= bottom and a <= upper, qcd['t21'][:]))
signal = list(filter(lambda a: a >= bottom and a <= upper, higgs['t21'][:]))
bins = 500
plt.hist(observed[:], bins, alpha=0.4, color='green', density = True, label="Observed Data")
plt.hist(signal[:], bins, alpha=0.4, color="red", density = True, label="Expected Signal")
plt.hist(background[:], bins, alpha=0.4, density = True, color="blue", label="Expected Background")
plt.legend(loc="upper right", fontsize=30)
plt.tick_params(labelsize = 22)
plt.title('21-subjettiness', fontsize=40)
plt.show()
nhiggs = (sum(signal[:]) / sum(higgs['t21'][:]))*50
nqcd_root = ((sum(observed[:])/sum(highLumi['t21'][:]))*2000)**(0.5)
ratio = nhiggs / nqcd_root
print("Observed Significance: ", ratio, "sigma")
bottom = 0.18
upper = 0.275
observed = list(filter(lambda a: a >= bottom and a <= upper, highLumi['KtDeltaR'][:]))
background = list(filter(lambda a: a >= bottom and a <= upper, qcd['KtDeltaR'][:]))
signal = list(filter(lambda a: a >= bottom and a <= upper, higgs['KtDeltaR'][:]))
bins = 500
plt.hist(observed[:], bins, alpha=0.4, color='green', density = True, label="Observed Data")
plt.hist(signal[:], bins, alpha=0.4, color="red", density = True, label="Expected Signal")
plt.hist(background[:], bins, alpha=0.4, density = True, color="blue", label="Expected Background")
plt.legend(loc="upper right", fontsize=30)
plt.tick_params(labelsize = 22)
plt.title('KtDeltaR', fontsize=40)
plt.show()
nhiggs = (sum(signal[:]) / sum(higgs['KtDeltaR'][:]))*50
nqcd_root = ((sum(observed[:])/sum(highLumi['KtDeltaR'][:]))*2000)**(0.5)
ratio = nhiggs / nqcd_root
print("Observed Significance: ", ratio, "sigma")
Similarly to lab 7, the expected significance of Eta scored an absurd value of 37.2 sigma, therefore for further analysis this feature will be disregarded. Since the observed data followed the expected background closely, a lot of the observed signficance values calculated are similar to the expectation significance values calculated in the previous lab.
| QCD Expected Background(sigma) | High Luminosity Observed(sigma) | |
|---|---|---|
| mass | 3.686 | 2.7658 |
| eta | 11.8528 | 37.256 |
| ee2 | 2.314 | 2.180 |
| ee3 | 4.955 | 4.722 |
| d2 | 2.508 | 2.402 |
| angularity | 2.146 | 2.070 |
| t2 | 1.857 | 1.800 |
| t3 | 2.1084 | 2.0000 |
| t21 | 1.8464 | 1.814 |
| KtDeltaR | 2.2456 | 1.8715 |
All 10 selected features have optimally selected cuts to the expected background data. Disregarding the anomaly of eta, the remaining 9 features all have the expected signficance higher than the observed significance. This makes sense because the optimal event selection is based on the background data set QCD. Although the observed data generally follows the background data, since event selection is optimized on the expected background(not the observed data), the expected significance should generally be slightly higher than the observed significance. The 3-point ECF ratio(ee3) still proves to be the highest observed significance of 4.722 sigma across all 10 selected features.
bins = 500
plt.hist(lowLumi['mass'][:], bins, alpha=0.4, color='green', density = True, label="Observed Data")
plt.hist(higgs['mass'][:], bins, alpha=0.4, color="red", density = True, label="Expected Signal")
plt.hist(qcd['mass'][:] , bins, alpha=0.4, density = True, color="blue", label="Expected Background")
plt.legend(loc="upper right", fontsize=30)
plt.xlim(20,450)
plt.yscale('log')
plt.tick_params(labelsize = 22)
plt.title('Mass', fontsize=40)
plt.show()
nhiggs = (sum(higgs['mass'][:]) / sum(higgs['mass'][:]))*50
nqcd_sqrt = ((sum(lowLumi['mass'][:])/sum(lowLumi['mass'][:]))*2000)**(0.5)
print("NHiggs: ", nhiggs)
print("(√NObserved): ", nqcd_sqrt)
ratio = nhiggs/nqcd_sqrt
print("Observed Significance: ", ratio, "sigma")
bins = 500
plt.hist(lowLumi['eta'][:], bins, alpha=0.4, color='green', density = True, label="Observed Data")
plt.hist(higgs['eta'][:], bins, alpha=0.4, color="red", density = True, label="Expected Signal")
plt.hist(qcd['eta'][:] , bins, alpha=0.4, density = True, color="blue", label="Expected Background")
plt.legend(loc="upper right", fontsize=30)
plt.tick_params(labelsize = 22)
plt.title('Eta', fontsize=40)
plt.show()
bins = 500
plt.hist(lowLumi['phi'][:], bins, alpha=0.4, color='green', density = True, label="Observed Data")
plt.hist(higgs['phi'][:], bins, alpha=0.4, color="red", density = True, label="Expected Signal")
plt.hist(qcd['phi'][:] , bins, alpha=0.4, density = True, color="blue", label="Expected Background")
plt.legend(loc="upper right", fontsize=30)
plt.tick_params(labelsize = 22)
plt.title('Phi', fontsize=40)
plt.show()
bins = 500
plt.hist(lowLumi["ee2"][:], bins, alpha=0.4, color='green', density = True, label="Observed Data")
plt.hist(higgs['ee2'][:], bins, alpha=0.4, color="red", density = True, label="Expected Signal")
plt.hist(qcd['ee2'][:] , bins, alpha=0.4, density = True, color="blue", label="Expected Background")
plt.legend(loc="upper right", fontsize=30)
plt.tick_params(labelsize = 22)
plt.title('2-point ECF', fontsize=40)
plt.show()
bins = 500
plt.hist(lowLumi['ee3'][:], bins, alpha=0.4, color='green', density = True, label="Observed Data")
plt.hist(higgs['ee3'][:], bins, alpha=0.4, color="red", density = True, label="Expected Signal")
plt.hist(qcd['ee3'][:] , bins, alpha=0.4, density = True, color="blue", label="Expected Background")
plt.legend(loc="upper right", fontsize=30)
plt.xlim(-0.0001, 0.01)
plt.yscale("log")
plt.tick_params(labelsize = 22)
plt.title('3-point ECF', fontsize=40)
plt.show()
bins = 500
plt.hist(lowLumi['d2'][:], bins, alpha=0.4, color='green', density = True, label="Observed Data")
plt.hist(higgs['d2'][:], bins, alpha=0.4, color="red", density = True, label="Expected Signal")
plt.hist(qcd['d2'][:] , bins, alpha=0.4, density = True, color="blue", label="Expected Background")
plt.legend(loc="upper right", fontsize=30)
plt.xlim(-5,75)
plt.yscale('log')
plt.tick_params(labelsize = 22)
plt.title('3 to 2 point ECF Ratio', fontsize=40)
plt.show()
bins = 500
plt.hist(lowLumi['angularity'][:], bins, alpha=0.4, color='green', density = True, label="Observed Data")
plt.hist(higgs['angularity'][:], bins, alpha=0.4, color="red", density = True, label="Expected Signal")
plt.hist(qcd['angularity'][:] , bins, alpha=0.4, density = True, color="blue", label="Expected Background")
plt.legend(loc="upper right", fontsize=30)
plt.yscale('log')
plt.tick_params(labelsize = 22)
plt.title('Angularity', fontsize=40)
plt.show()
bins = 500
plt.hist(lowLumi['t1'][:], bins, alpha=0.4, color='green', density = True, label="Observed Data")
plt.hist(higgs['t1'][:], bins, alpha=0.4, color="red", density = True, label="Expected Signal")
plt.hist(qcd['t1'][:] , bins, alpha=0.4, density = True, color="blue", label="Expected Background")
plt.legend(loc="upper right", fontsize=30)
plt.xlim(0,2.5)
plt.tick_params(labelsize = 22)
plt.title('1-subjettiness', fontsize=40)
plt.show()
bins = 500
plt.hist(lowLumi['t2'][:], bins, alpha=0.4, color='green', density = True, label="Observed Data")
plt.hist(higgs['t2'][:], bins, alpha=0.4, color="red", density = True, label="Expected Signal")
plt.hist(qcd['t2'][:] , bins, alpha=0.4, density = True, color="blue", label="Expected Background")
plt.legend(loc="upper right", fontsize=30)
plt.xlim(0,1.3)
plt.tick_params(labelsize = 22)
plt.title('2-subjettiness', fontsize=40)
plt.show()
bins = 500
plt.hist(lowLumi['t3'][:], bins, alpha=0.4, color='green', density = True, label="Observed Data")
plt.hist(higgs['t3'][:], bins, alpha=0.4, color="red", density = True, label="Expected Signal")
plt.hist(qcd['t3'][:] , bins, alpha=0.4, density = True, color="blue", label="Expected Background")
plt.legend(loc="upper right", fontsize=30)
plt.tick_params(labelsize = 22)
plt.title('3-subjettiness', fontsize=40)
plt.show()
bins = 500
plt.hist(lowLumi['t21'][:], bins, alpha=0.4, color='green', density = True, label="Observed Data")
plt.hist(higgs['t21'][:], bins, alpha=0.4, color="red", density = True, label="Expected Signal")
plt.hist(qcd['t21'][:] , bins, alpha=0.4, density = True, color="blue", label="Expected Background")
plt.legend(loc="upper right", fontsize=30)
plt.tick_params(labelsize = 22)
plt.title('21-subjettiness', fontsize=40)
plt.show()
bins = 500
plt.hist(lowLumi['t32'][:], bins, alpha=0.4, color='green', density = True, label="Observed Data")
plt.hist(higgs['t32'][:], bins, alpha=0.4, color="red", density = True, label="Expected Signal")
plt.hist(qcd['t32'][:] , bins, alpha=0.4, density = True, color="blue", label="Expected Background")
plt.legend(loc="upper right", fontsize=30)
plt.tick_params(labelsize = 22)
plt.title('32-subjettiness', fontsize=40)
plt.show()
bins = 500
plt.hist(lowLumi['KtDeltaR'][:], bins, alpha=0.4, color='green', density = True, label="Observed Data")
plt.hist(higgs['KtDeltaR'][:], bins, alpha=0.4, color="red", density = True, label="Expected Signal")
plt.hist(qcd['KtDeltaR'][:] , bins, alpha=0.4, density = True, color="blue", label="Expected Background")
plt.legend(loc="upper right", fontsize=30)
plt.tick_params(labelsize = 22)
plt.title('KtDeltaR', fontsize=40)
plt.show()
Similar to the high luminosity observed data, the low luminosity data is spiky and it seems like the values are discretized for all features. It generally follows the background, but the high luminosity observed data follows the background much better than the low luminosity observed data. The low luminosity is also much more spiky, and is spiky throughout the data set. While the high luminosity data has its data points condensed around the mean, and becomes more fragmented when it reaches the outliers, the low luminosity data set is fragmented throughout. The following of the expected background is poor, looking at 21-subjettiness as an example, low luminosity does a poor job avoiding the expected signal dataset, and there are often data points overlapping with the expected signal. The observed significance is still 1.118 sigma because not cuts have been made.
bottom = 124.9
upper = 125.1
observed = list(filter(lambda a: a >= bottom and a <= upper, lowLumi['mass'][:]))
background = list(filter(lambda a: a >= bottom and a <= upper, qcd['mass'][:]))
signal = list(filter(lambda a: a >= bottom and a <= upper, higgs['mass'][:]))
bins = 500
plt.hist(observed[:], bins, alpha=0.4, color='green', density = True, label="Observed Data")
plt.hist(signal[:], bins, alpha=0.4, color="red", density = True, label="Expected Signal")
plt.hist(background[:], bins, alpha=0.4, density = True, color="blue", label="Expected Background")
plt.legend(loc="upper right", fontsize=30)
plt.yscale("log")
plt.xlim(124.9,125.1)
plt.tick_params(labelsize = 22)
plt.title('Mass', fontsize=40)
plt.show()
nhiggs = (sum(signal[:]) / sum(higgs['mass'][:]))*50
nqcd_root = ((sum(observed[:])/sum(lowLumi['mass'][:]))*2000)**(0.5)
ratio = nhiggs / nqcd_root
print("Observed Significance: ", ratio, "sigma")
bottom = -1
upper = 1
observed = list(filter(lambda a: a >= bottom and a <= upper, lowLumi['eta'][:]))
background = list(filter(lambda a: a >= bottom and a <= upper, qcd['eta'][:]))
signal = list(filter(lambda a: a >= bottom and a <= upper, higgs['eta'][:]))
bins = 500
plt.hist(observed[:], bins, alpha=0.4, color='green', density = True, label="Observed Data")
plt.hist(signal[:], bins, alpha=0.4, color="red", density = True, label="Expected Signal")
plt.hist(background[:], bins, alpha=0.4, density = True, color="blue", label="Expected Background")
plt.legend(loc="upper right", fontsize=30)
plt.tick_params(labelsize = 22)
plt.title('Eta', fontsize=40)
plt.show()
nhiggs = (sum(signal[:]) / sum(higgs['eta'][:]))*50
nqcd_root = ((sum(observed[:])/sum(lowLumi['eta'][:]))*2000)**(0.5)
ratio = nhiggs / nqcd_root
print("Observed Significance: ", ratio, "sigma")
bottom = 0.04
upper = 0.07
observed = list(filter(lambda a: a >= bottom and a <= upper, lowLumi['ee2'][:]))
background = list(filter(lambda a: a >= bottom and a <= upper, qcd['ee2'][:]))
signal = list(filter(lambda a: a >= bottom and a <= upper, higgs['ee2'][:]))
bins = 500
plt.hist(observed[:], bins, alpha=0.4, color='green', density = True, label="Observed Data")
plt.hist(signal[:], bins, alpha=0.4, color="red", density = True, label="Expected Signal")
plt.hist(background[:], bins, alpha=0.4, density = True, color="blue", label="Expected Background")
plt.legend(loc="upper right", fontsize=30)
plt.tick_params(labelsize = 22)
plt.title('2-point ECF', fontsize=40)
plt.show()
nhiggs = (sum(signal[:]) / sum(higgs['ee2'][:]))*50
nqcd_root = ((sum(observed[:])/sum(lowLumi['ee2'][:]))*2000)**(0.5)
ratio = nhiggs / nqcd_root
print("Observed Significance: ", ratio, "sigma")
bottom = .0001
upper = .0004
observed = list(filter(lambda a: a >= bottom and a <= upper, lowLumi['ee3'][:]))
background = list(filter(lambda a: a >= bottom and a <= upper, qcd['ee3'][:]))
signal = list(filter(lambda a: a >= bottom and a <= upper, higgs['ee3'][:]))
bins = 500
plt.hist(observed[:], bins, alpha=0.4, color='green', density = True, label="Observed Data")
plt.hist(signal[:], bins, alpha=0.4, color="red", density = True, label="Expected Signal")
plt.hist(background[:], bins, alpha=0.4, density = True, color="blue", label="Expected Background")
plt.legend(loc="upper right", fontsize=30)
plt.tick_params(labelsize = 22)
plt.yscale("log")
plt.title('3-point ECF', fontsize=40)
plt.show()
nhiggs = (sum(signal[:]) / sum(higgs['ee3'][:]))*50
nqcd_root = ((sum(observed[:])/sum(lowLumi['ee3'][:]))*2000)**(0.5)
ratioee3 = nhiggs / nqcd_root
bottom = 0
upper = 2
observed = list(filter(lambda a: a >= bottom and a <= upper, lowLumi['d2'][:]))
background = list(filter(lambda a: a >= bottom and a <= upper, qcd['d2'][:]))
signal = list(filter(lambda a: a >= bottom and a <= upper, higgs['d2'][:]))
bins = 500
plt.hist(observed[:], bins, alpha=0.4, color='green', density = True, label="Observed Data")
plt.hist(signal[:], bins, alpha=0.4, color="red", density = True, label="Expected Signal")
plt.hist(background[:], bins, alpha=0.4, density = True, color="blue", label="Expected Background")
plt.legend(loc="upper right", fontsize=30)
plt.tick_params(labelsize = 22)
plt.title('3 to 2 point ECF', fontsize=40)
plt.show()
nhiggs = (sum(signal[:]) / sum(higgs['d2'][:]))*50
nqcd_root = ((sum(observed[:])/sum(lowLumi['d2'][:]))*2000)**(0.5)
ratio = nhiggs / nqcd_root
print("Observed Significance: ", ratio, "sigma")
bottom = 0
upper = 0.001
observed = list(filter(lambda a: a >= bottom and a <= upper, lowLumi['angularity'][:]))
background = list(filter(lambda a: a >= bottom and a <= upper, qcd['angularity'][:]))
signal = list(filter(lambda a: a >= bottom and a <= upper, higgs['angularity'][:]))
bins = 500
plt.hist(observed[:], bins, alpha=0.4, color='green', density = True, label="Observed Data")
plt.hist(signal[:], bins, alpha=0.4, color="red", density = True, label="Expected Signal")
plt.hist(background[:], bins, alpha=0.4, density = True, color="blue", label="Expected Background")
plt.legend(loc="upper right", fontsize=30)
plt.tick_params(labelsize = 22)
plt.title('Angularity', fontsize=40)
plt.show()
nhiggs = (sum(signal[:]) / sum(higgs['angularity'][:]))*50
nqcd_root = ((sum(observed[:])/sum(lowLumi['angularity'][:]))*2000)**(0.5)
ratio = nhiggs / nqcd_root
print("Observed Significance: ", ratio, "sigma")
bottom = 0.1
upper = 0.3
observed = list(filter(lambda a: a >= bottom and a <= upper, lowLumi['t2'][:]))
background = list(filter(lambda a: a >= bottom and a <= upper, qcd['t2'][:]))
signal = list(filter(lambda a: a >= bottom and a <= upper, higgs['t2'][:]))
bins = 500
plt.hist(observed[:], bins, alpha=0.4, color='green', density = True, label="Observed Data")
plt.hist(signal[:], bins, alpha=0.4, color="red", density = True, label="Expected Signal")
plt.hist(background[:], bins, alpha=0.4, density = True, color="blue", label="Expected Background")
plt.legend(loc="upper right", fontsize=30)
plt.tick_params(labelsize = 22)
plt.title('2-subjettiness', fontsize=40)
plt.show()
nhiggs = (sum(signal[:]) / sum(higgs['t2'][:]))*50
nqcd_root = ((sum(observed[:])/sum(lowLumi['t2'][:]))*2000)**(0.5)
ratio = nhiggs / nqcd_root
print("Observed Significance: ", ratio, "sigma")
bottom = 0
upper = 0.2
observed = list(filter(lambda a: a >= bottom and a <= upper, lowLumi['t3'][:]))
background = list(filter(lambda a: a >= bottom and a <= upper, qcd['t3'][:]))
signal = list(filter(lambda a: a >= bottom and a <= upper, higgs['t3'][:]))
bins = 500
plt.hist(observed[:], bins, alpha=0.4, color='green', density = True, label="Observed Data")
plt.hist(signal[:], bins, alpha=0.4, color="red", density = True, label="Expected Signal")
plt.hist(background[:], bins, alpha=0.4, density = True, color="blue", label="Expected Background")
plt.legend(loc="upper right", fontsize=30)
plt.tick_params(labelsize = 22)
plt.title('3-subjettiness', fontsize=40)
plt.show()
nhiggs = (sum(signal[:]) / sum(higgs['t3'][:]))*50
nqcd_root = ((sum(observed[:])/sum(lowLumi['t3'][:]))*2000)**(0.5)
ratio = nhiggs / nqcd_root
print("Observed Significance: ", ratio, "sigma")
bottom = 0
upper = 0.42
observed = list(filter(lambda a: a >= bottom and a <= upper, lowLumi['t21'][:]))
background = list(filter(lambda a: a >= bottom and a <= upper, qcd['t21'][:]))
signal = list(filter(lambda a: a >= bottom and a <= upper, higgs['t21'][:]))
bins = 500
plt.hist(observed[:], bins, alpha=0.4, color='green', density = True, label="Observed Data")
plt.hist(signal[:], bins, alpha=0.4, color="red", density = True, label="Expected Signal")
plt.hist(background[:], bins, alpha=0.4, density = True, color="blue", label="Expected Background")
plt.legend(loc="upper right", fontsize=30)
plt.tick_params(labelsize = 22)
plt.title('21-subjettiness', fontsize=40)
plt.show()
nhiggs = (sum(signal[:]) / sum(higgs['t21'][:]))*50
nqcd_root = ((sum(observed[:])/sum(lowLumi['t21'][:]))*2000)**(0.5)
ratio = nhiggs / nqcd_root
print("Observed Significance: ", ratio, "sigma")
bottom = 0.18
upper = 0.275
observed = list(filter(lambda a: a >= bottom and a <= upper, lowLumi['KtDeltaR'][:]))
background = list(filter(lambda a: a >= bottom and a <= upper, qcd['KtDeltaR'][:]))
signal = list(filter(lambda a: a >= bottom and a <= upper, higgs['KtDeltaR'][:]))
bins = 500
plt.hist(observed[:], bins, alpha=0.4, color='green', density = True, label="Observed Data")
plt.hist(signal[:], bins, alpha=0.4, color="red", density = True, label="Expected Signal")
plt.hist(background[:], bins, alpha=0.4, density = True, color="blue", label="Expected Background")
plt.legend(loc="upper right", fontsize=30)
plt.tick_params(labelsize = 22)
plt.title('KtDeltaR', fontsize=40)
plt.show()
nhiggs = (sum(signal[:]) / sum(higgs['KtDeltaR'][:]))*50
nqcd_root = ((sum(observed[:])/sum(lowLumi['KtDeltaR'][:]))*2000)**(0.5)
ratio = nhiggs / nqcd_root
print("Observed Significance: ", ratio, "sigma")
Optimal cuts on the select 10 features have been made based on QCD expected background data, just like in the high luminosity example. By making these cuts, and getting a closer look at the plots, it highlights the spiky nature of the low luminosity dataset. This means across the select features, the data points are discretized to certain values.
| QCD Expected Background(sigma) | High Luminosity Observed(sigma) | Low Luminosity Observed(sigma) | |
|---|---|---|---|
| mass | 3.686 | 2.7658 | 2.7037 |
| eta | 11.8528 | 37.256 | 22.3898 |
| ee2 | 2.314 | 2.180 | 2.184 |
| ee3 | 4.955 | 4.722 | 4.426 |
| d2 | 2.508 | 2.402 | 2.438 |
| angularity | 2.146 | 2.070 | 2.115 |
| t2 | 1.857 | 1.800 | 1.867 |
| t3 | 2.1084 | 2.0000 | 2.0851 |
| t21 | 1.8464 | 1.814 | 1.832 |
| KtDeltaR | 2.2456 | 1.8715 | 1.7341 |
Once again the eta value scores an absurd significance of 22.3898 sigma, and hence will be ignored in the comparison. It is interesting to me that across all 3 datasets, I got really high significance values for eta. Just like in the high luminosity example, the observed significance for low luminosity is less than all 9 remaining select features for the expected significance. This makes sense because the cuts were done based on the QCD background data set and not the low luminosity data. Comparing high lumi vs low lumi: out of the 9 remaining features 6 have a higher significance for the low luminosity data. The differences in observed significance vary from as little as ~0.02, to as large as ~0.3 sigma. There is no clear trend between the two datasets on the observed significance, and the differences can be written off as statistical variation. The 3-point ECF ratio(ee3) continues to be the highest observed significance value.
confidence_signal = np.percentile(higgs[:], 95)
print('95% confidence upperbound of the expected signal is:', confidence_signal)
confidence_background = np.percentile(qcd[:], 95)
print('95% confidence upperbound of the expected background is:', confidence_background)
confidence_observed = np.percentile(lowLumi[:], 95)
print('95% confidence upperbound of the observed data is:', confidence_observed)
The 95% confidence upperbound for the observed low luminosity data is ~7 higher than the expected background. This tells me that generally the observed data at the 95th percentile is closer to the expected signal than the expected background. Also if I observe a candidate signal (5σ event), and it is too weak to claim a detection, then the true signal would be less than 1070.367 (95% confidence upper bound of observed data) 95% of the time.
The 1 sigma uncertainty of the expected signal & background 95% confidence upper limit is +/- 32.578 andd 32.572 respectively. This shows us that the expected confidence is a good approximation off the observed confidence, because the observed confidence is in less than one sigma of the expected.
p = stats.norm.cdf(1) #probability of one sigma
val = stats.poisson.ppf(p, confidence_signal) #1 sigma uncertainty about the expected 95% confidence upper limit
uncertainty = abs(confidence_signal - val)
print("Expected signal: ", '%.3f'%(confidence_signal), " has a 1σ uncertainty of +/- ", '%.3f'%(uncertainty))
val = stats.poisson.ppf(p, confidence_background)
uncertainty = abs(confidence_background - val)
print("Expected background: ", '%.3f'%(confidence_background), " has a 1σ uncertainty of +/- ", '%.3f'%(uncertainty))